##########################################
### Hypothesis Testing
### Samantha Zuhlke
###
### This R script introduces:
### - hypothesis testing
### - bivariate analyses
### - directional hypothesis testing
##########################################

# First thing, we set our preamble. 
# If you haven't already, install these packages.
install.packages("foreign")
install.packages("readstata13")
install.packages("ggplot2")
install.packages("lattice")
install.packages("car")
install.packages("dplyr")

require(foreign)
require(readstata13)
require(ggplot2) # two, not z
require(lattice)
require(car)
require(dplyr)

getwd()
setwd("Insert file path")
getwd()
# Two reminders: 
##### everyone's working directory will be different.
##### As a reminder, every R script needs a preamble.

# Note, you may want to use the "here" package with students instead, 
## which makes setting workspaces much easier. 

##########################################

# clear the environment
rm(list=ls())

# Note, it's not best practice to begin your R script with rm(list=ls()), 
#   because it only clears objects in your Global Environment and not other settings.
# It's better to start a new R session when you run a new script.
# However, I find that impractical when teaching multiple tutorials in a lab, 
#   and so use rm(list=ls()) in tutorials when moving between scripts.
# If you do the same, be sure to flag this for your students that it isn't best practice!

##########################################
# 4 steps in hypothesis testing
##### 1. Write down your null hypothesis + research hypothesis (write down everything you know)
##### 2. Decide what statistical test to use 
##### 3. Conduct test, decide significance level (set alpha, calculate a z or t score, and p value)
##### 4. Interpret results (substance and significance)
###########################################

###########################################
# Import data 
###########################################
data <- diamonds %>% sample_n(1000)
# Or
# data <- diamonds 

# In class question: how is our data distributed? 

summary(data)

# examine our data and variables of interest: size (carat) and price.
summary(data)
summary(data$price)

qplot(x=carat, data=data)
qplot(x=price, data=data)

# we use the distribution of the data to inform what type of test we use
hist(data$carat)
hist(data$price)

###########################################
# Null hypothesis / research hypothesis (step 1) 
###########################################

# Research question: is the size of a diamond related to its price? 

# The null hypothesis of this analysis is that there is no relationship between diamond size and price.

###########################################
# decide which statistical test (step 2)
###########################################

# Examine the data

# Scatterplot 
# The command for a scatterplot is plot(INDEPENDENT VARIABLE, DEPENDENT VARIABLE)
plot(data$carat,data$price)

# if we want to add labels
plot(data$carat,data$price,ylab="Price",
     xlab="Carat",main="My Scatterplot!")

# Add a trend line
# OUTCOME ~ PREDICTOR, DATA = DATA NAME
abline(reg = lm(price~carat,data=data),lty=1,lwd=4, 
       col = "red")

# both variables are continuous data, and we are interested in the relationship between them. 
# so, we should use a pearson correlation test.

# first, what do you expect the relationship between these variables to look like?
# price should increase as size of the diamond increases. 


###########################################
# conduct statistical test (step 3)
###########################################

# Correlation: how 2 variables "move" together
# cor(Y,X)
cor(data$price,data$carat)

# in class question - does this correlation meet our substantive expectations? 
# is this a strong, medium, or weak correlation?
# is this a statistically significant correlation?

# set alpha = 0.1

# Pearson Corellation (similarity)
# recall that Pearson tests are for comparing interval level data: IV and DV
# y,x
cor.test(data$price,data$carat,method = "pearson")
# NULL = There is no relationship between the variables.


############################################
# interpret results (step 4)
############################################

# 1. what is the substantive relationship? does it meet our expectations that price will increase as size increases?
# 2. is the relationship statistically significant at alpha = 0.1? 

#######################################
#######################################
# Other types of bivariate analyses 
#######################################
# There are lots of types of bivariate analysis
# bivariate analyses: tests comparing two variables
# use different tests based on your research question and measurement structure of variables
# here are some of them: 
########################################

# Kendal Tau (similarity)
# recall that Kendal Tau compares ordinal level data: IV and DV 
# NULL = There is no relationship between the variables.
cor.test(data$clarity,data$cut,method = "kendal")
# Why didn't this code work?
# Let's fix it: 

qplot(x=clarity, data=data)
qplot(x=cut, data=data)
summary(data$clarity)

num.clarity<-as.numeric(data$clarity)
# tells R to make a variable that has a numeric value
# tells R to treat it like a continuous number
num.cut <-as.numeric(data$cut)

cor.test(num.clarity, num.cut, method = "kendal")

# Chi square (similarity)
# Recall that chi square compares nominal level data: IV and DV
chisq.test(data$cut,data$clarity)
# or...
mytable <-xtabs(~cut+clarity, data=data)
mytable
summary(mytable) #the summary command produces a chi-sq test of independence
# NULL = No association between the variables
# We reject the null. Therefore, there is association between the variables.

# t-tests
# t tests compare the means of groups. it asks: are the means statistically significantly differnet? 

# independent 2-group t-test
# t.test(y,x) # where both variables are numeric (i.e. interval)
t.test(diamonds$price, diamonds$carat)
# NULL == "no meaningful difference in means"
# Do we reject or fail to reject the null?

# but you need to pay attention to the measurement of the variables
# independent 2-group t-test
# t.test(y~x) # where y is numeric and x is a binary factor
qplot(x=cut, data=diamonds)
# NULL == "no meaningful difference in means"
# Do we reject or fail to reject the null?

# examine the different group means 
# summary statistics by group
# install.packages("psych")
library(psych)
describeBy(diamonds, diamonds$cut)
describeBy(diamonds$price, diamonds$cut)

# create two categories: fair and good - ideal.
# do fair diamonds have a different mean price then other cuts? 
diamonds$cutcat <- as.factor(ifelse(diamonds$cut == "Fair", c("0"), c("1"))) 
# View(diamonds)
summary(diamonds$cutcat)

t.test(diamonds$price ~ diamonds$cutcat)


###########################################
# a few more (manual t-test) hypothesis testing examples
###########################################

# Example 1: N = 1000 

# create a sample of the diamonds data
sample <- diamonds %>% sample_n(1000)

mean(sample$price) # "sample" mean = 3969.098
mean(diamonds$price) # "population" mean = 3932.8

# research question: does the sample mean = the population mean? 

# H0 = population mean = 3932.8
# Ha = greater than the population mean (3932.8)

# how would we test? 
se.sample <- sd(sample$price)/sqrt(1000)
z <- (3969.098 - 3932.8)/se.sample
print(z)
pnorm(z) # area to the left
1-pnorm(z) # area to the right 
pnorm(z, lower.tail = FALSE) # area to the right 

# fail to reject the null hypothesis

# Example 2: 

# research question: is the sample mean greater than 0?

mean(sample$price) # "sample" mean = 1796.6


# H0 = mean = 0 
# Ha = does not equal 0 

# how would we test? 
se.sample <- sd(sample$price)/sqrt(1000)
z2 <- (1796.6 - 0)/se.sample
print(z2)

pnorm(z2) # area to the left
1-pnorm(z2) # area to the right 
pnorm(z2, lower.tail = FALSE) # area to the right 

# we reject the null hypothesis

##########################################
# Directional hypothesis testing
#########################################
# Let's use another example - data on gifted children
# Research question: What age do gifted children learn to count to 10 at?

# install.packages("openintro")
require(openintro)
data(gifted)

# First let's get to know the data/variables.
# What is the code for summary statistics? 

min(gifted$count) # referencing variables 
# Interpretation: The minimum age in months that children first counted to 10 is 21 months.

mean(gifted$count)
# The mean age in months that children first counted to 10 is 31 months.

median(gifted$count)
# The median age in months that children first counted to 10 is 31 months.

sd(gifted$count)
# The standard deviation around the mean in months that children first counted to 10 is 4.3 months.

max(gifted$count)
# The max age in months that children first counted to 10 is 39 months.

# OR: 
summary(gifted$count) # missing standard deviation
sd(gifted$count)

# histogram
hist(gifted$count)

# make it look a little prettier
qplot(x=count, data=gifted, xlab="Age in Months", ylab="Count", main="Distribution of Count")

# Next, let's conduct a hypothesis test

# do we use a z or a t test, based on our histogram in part a? 
# What do we do when 30 < N < 50?

# What do we know? 
# Write down the null and research/alternative hypothesis, and 
#   everything else we know.

# gifted$count average observed = 32 months

# H0: u = 32 months
# Ha: u < 32 months 
# N = 36
# a = 0.10

# Z test 
mean.gifted <- mean(gifted$count)
sd <- sd(gifted$count)
# z = obs - mean / standard error
se <- sd / sqrt(36)
z <- (mean.gifted - 32) / se
p <- pnorm(z) # left tail because Ha < 32
print(p)

# What direction are we interested in? 

# p = 0.034 < 0.1
# So, we reject the null hypothesis that the average age is 32 months.
# The data provide evidence that the average age at which gifted children learn to 
# count to 10 is lower than 32 months.

# What does this literally mean? If the true mean is 32 months, there is a 3% chance of seeing a 
# mean of 30.1 (observed value) or lower.

# What if Ha was Ha: u > 32? 
p2 <- 1-p
print(p2)


# t test 
t <- (mean.gifted - 32) / se
print(t)

df <- 36 - 1
print(df)

# t crit = 1.055 - 1.050
# We reject the null hypothesis since |tstat| > tcrit


# We can also calculate confidence intervals 
# confidence interval @ 90 % = mean +,- z*s.e.
# ci @ 90% = mean +- tcrit * s.e.

# z =  for a 90% confidence level = 1.65
# p = .10 / 2 = 0.05 -> z = 1.65

upper <- mean.gifted + (1.65*se)
lower <- mean.gifted - (1.65*se)

ci <- c(lower, upper)
print(ci)

# We are 90% confident that the average age at which gifted children first count to 10 successfully is 
# between 29.51 and 31.88 months old.

# the results from my hypothesis test and confidence interval agree. The hypothesis resulted in the 
# rejection of the null hypothesis, and the value of the null hypothesis (32) is not within the confidence interval.

##########################################
# Paired t-tests 
##########################################
# before, we conducted t-tests where the two groups were independent. 
# but, the groups may not always be independent from each other. 
# For example, consider partners.

# produce a new variable gifted.iq
diff.iq <- gifted$motheriq - gifted$fatheriq

# summary statistics: diffiq 
diff <- summary(diff.iq)
print(diff)
sd.diff <- sd(diff.iq)
print(sd.diff)
hist(diff.iq)

# The mean difference in iq between mothers and fathers is 3.4 points. The standard deviation is 7.5.
# On average, each observation is 7.5 standard deviations away from the sample mean.

# conduct a paired t test between mother iq and father iq
t.test(gifted$motheriq, gifted$fatheriq, paired=TRUE)
# p = 0.01, therefore we reject the null hypothesis that the difference in means is equal to 0.

# manually calculate a paired t-test
# H0: mu = 0
# Ha: mu does not equal 0 
# T = average difference - observed difference / s.e. difference
# a = 0.05 (set by me)
sd <- sd(diff.iq)
se <- sd/sqrt(36) 
T <- (3.4 - 0)/se
print(T)

df <- 36-1
print(df)

# t critical is between 2.042 and 2.021
# |t statistic| = 2.73 > t critical 
# Therefore, I reject the null hypothesis that there is no difference between mothers' and fathers' iqs
# The data provide evidence that the iqs of mothers and fathers are different.



